# !/usr/bin/python
# -*- coding: utf-8 -*-
import os

from 神方.util.date_util import *
from 神方.util.http_util import get_request
from 神方.util.write_excel import write_file

# 根据比较最后一条记录的日期来获取下一页的url
def get_last_date(trs, now_year, start_time):
    # 当前总条数
    num = len(trs)
    # 获取到最后一条的日期
    lastDate = now_year + '-' + trs[num - 1]('td')[4].string
    print('河南省最后一条的日期是：%s' % lastDate)
    # 日期转化为int比较
    lastDate = time.mktime(time.strptime(lastDate, "%Y-%m-%d"))
    lastDiff = int(lastDate) - int(start_time)
    if lastDiff >= 0:
        return 1
    else:
        return 0

def get_henan_value(url, pageNum, list_datas, now_year, start_time):
    soup = get_request(url)
    # 根据标签获取数据
    trs = soup.find('tbody').findAll('tr')

    for i in trs:
        tds = i('td')
        if len(tds) == 5:
            # 内容
            content = tds[0].a['title']
            # 行业
            industry = tds[1].string
            # 日期
            date = now_year + '-' + tds[4].string
            # 获取当前日期作为结束日期
            end_time = time.mktime(time.strptime(date, "%Y-%m-%d"))
            # 日期转化为int比较
            diff = int(end_time) - int()
            if diff >= 0:
                list_td = ['河南省', '', date, industry, content]
                list_datas.append(list_td)
            else:
                break

    flag = get_last_date(trs, now_year, start_time)
    if flag == 1:
        pageNum = pageNum + 1
        urlNext = 'http://www.zgazxxw.com/hn-001011l772-' + str(pageNum) + '.html'
        get_henan_value(urlNext, pageNum, list_datas, now_year, start_time)

    return list_datas

if __name__ == '__main__':
    url = 'http://www.zgazxxw.com/hn-001011l772-0.html'
    # 获得当前年份
    now_year = get_now_year_date()
    # 获得当前日期的前十天的日期
    now_time_str = get_date(10, '%Y-%m-%d')
    # mktime参数为struc_time,将日期转化为秒，
    start_time = get_offset(now_time_str, '%Y-%m-%d')


    # 创建一个空的列表用来存放数据
    list_datas = []
    pageNum = 0
    list_datas = get_henan_value(url, pageNum, list_datas, now_year, start_time)

    data = {
        "序号": ["省份", "城市", "日期", "行业", "内容"]
    }

    index = 1
    for list in list_datas:
        data[str(index)] = list
        index = index + 1

    # 判断excel文件是否存在
    now_time = get_now_date('%Y%m%d')
    file_path = 'D:\\招投标信息' + now_time_str.replace('-', '') + '-' + now_time + '.xls'
    if os.path.exists(file_path):
        print('删除文件')
        os.remove(file_path)
    # 写入excel文件
    write_file(file_path, data, '招投标信息')



